/* 
Create figures with incumbent level outcomes by share of women at establishment
	// input: inclevel_dataset
	// output: atblemp_het_by_femxshare
*/ 

cap log close
cap log using "$logs/inclevel_het_log", replace


* data toggles
local singest = 0 //0: no restriction; 1: worker's baseline establishment must be a single establishment firm
local bothFM = 1 //0: no restriction; 1: worker's baseline establishment must employ both men and women at basline
local covered = 1 //0: no restriction; 1: worker's baseline establishment must be in the geographic coverage of the CBA
local signing = 1 //0: no restriction; 1: worker's baseline establishment must be a signing establishment
local geoglevel = "microregion_mode" //"": for no geog-yr FES; "state_mode": for state-yr FEs; "microregion_mode": for microregion-yr FEs
local no2011 = 1 //0: include 2011; 1: exclude 2011
local restr = 0 //0: no restriction; 1: worker must be 18 or older and not be a in probation at baseline

* load incumbent data
use "$files/inclevel_dataset.dta", clear

* sample restrictions
if `singest' {
	keep if singestwork==1
}
if `bothFM' {
	keep if bothFMwork==1
}
if `covered' {
	keep if coverwork==1
}
if `signing' {
	keep if signwork==1
}
if `restr' {
	keep if blage>=18
	gen prob=(blten<=3)
	drop if prob==1
}

* time varying fixed effects
//industry
gen ind2d_num = floor(blind/1e3)
gen ind3d_num = floor(blind/1e2)
egen indyrFE = group(year ind2d_num)
//geography
rename blstate state_mode
rename blmr microregion_mode
egen geogyrFE = group(year `geoglevel')
//tenure
gen ten3m = floor(blten/3)
egen tenyrFE = group(year ten3m)

* for clustering
gen clustergrp = blempl

* event study var 
gen post = (year>=2015) if year!=.
gen treatpost = treatwork*post
gen postgender = post*gender
gen treatgender = treatwork*gender 

tab year, gen(yearj)
	
forvalues i = 1/7 {
	gen interj`i'=treatwork*yearj`i'
}

lab var interj1 "2011"
lab var interj2 "2012"
lab var interj3 "2013"
lab var interj4 "2014"
lab var interj5 "2015"
lab var interj6 "2016"
lab var interj7 "2017"

gen zero=0
lab var zero "2014"
replace interj4 = zero

* Inverse of employment: for weighting regressions
gen blfeminv=1/blfem
gen blmalinv=1/blmal

gen blinv=blfeminv if gender==1
replace blinv=blmalinv if gender==0

* Outcome variables
label var atblemp	"At baseline employer"

* Heterogeneity
egen unique_obs = tag(fakeid_worker)

//share of women at baseline
gen sharewomen_bl = blfem/blemp

/* Baseline % women bins (establishment) */
forvalues i = 0/4 {
local j1=`i'*0.2
local j2=`i'*0.2+0.2
gen share`i'=(sharewomen_bl>=`j1' & sharewomen_bl<`j2')
}
replace share4 = 1 if sharewomen_bl==1

qui count if unique_obs==1
local denom = r(N)
forvalues i = 0/4 {
qui count if unique_obs==1 & share`i'==1
local numer = r(N)
di "% in share`i' = " (`numer'/`denom')*100
}
//for stats
reghdfe atblemp treatpost if (share0==1|share1==1|share2==1) & gender==1 [aweight=blfeminv], absorb(fakeid_worker *FE) cluster(clustergrp)

/* Adjust bins: Baseline % women bins (establishment) */
forvalues i = 0/4 {
gen xshare`i' = share`i'
}
replace xshare3 = 1 if xshare4==1
drop xshare4

qui count if unique_obs==1
local denom = r(N)
forvalues i = 0/3 {
qui count if unique_obs==1 & xshare`i'==1
local numer = r(N)
di "% in share`i' = " (`numer'/`denom')*100
}
qui unique fakeid_estab if year==2014
local denom = `r(unique)'
forvalues i = 0/3 {
	qui unique fakeid_estab if xshare`i'==1 & year==2014
	local numer = `r(unique)'
	di `numer'/`denom'*100
}
//for stats
reghdfe atblemp treatpost if (xshare0==1|xshare1==1|xshare2==1) & gender==1 [aweight=blfeminv], absorb(fakeid_worker *FE) cluster(clustergrp)
	

/*** Heterogeneity plot ***/
* Plot heterogeneity in treatment effect by share of women at the firm 

local depvar atblemp
local j=1

foreach var of local depvar {

	forvalues i = 0/3 {
		eststo DDf`i': reghdfe `var' treatpost if xshare`i'==1 & gender==1 [aweight=blfeminv], absorb(fakeid_worker *FE) cluster(clustergrp)
	}
	
			#delimit ;
			coefplot  (DDf0, aseq("0-19") \ DDf1, aseq("20-39") \ DDf2, aseq("40-59") \ 
						DDf3, aseq("60-100") mcol(sourapple1)ciopts(lpatt(solid)lcol(sourapple1%50)))
							
						, 
			
			
			vertical keep(treatpost) title("") 
			yline(0, lc(sourapple4) lp(solid)) 
			ytitle("DID estimate: female retention", size(medlarge)) ylabel(-0.02(0.02)0.06, labsize(medium) gmax gmin) 
			xtitle("Share of women at establishment", size(medlarge)) xlabel(none, labsize(medium)) 
			plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(color(white));
	
	graph export "$figures/`var'_het_by_femxshare.tif", replace;
	
	#delimit cr
	local ++j
}


cap log close
